library("gridExtra")
library("tidyverse")
## ── Attaching packages ─────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.2.1 ✓ purrr 0.3.3
## ✓ tibble 2.1.3 ✓ dplyr 0.8.4
## ✓ tidyr 1.0.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::combine() masks gridExtra::combine()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library("ggplot2")
library("highcharter")
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Highcharts (www.highcharts.com) is a Highsoft software product which is
## not free for commercial and Governmental use
First grab the data.
df_2019 <- read.csv("/Users/lubis/Documents/study/dataanalysis/hackathon/world-happiness/2019.csv", header=TRUE)
df_2015 <-
read.csv("/Users/lubis/Documents/study/dataanalysis/hackathon/world-happiness/2015.csv", header=TRUE) %>%
select(Country, Region)
Check types of each column
str(df_2019)
## 'data.frame': 156 obs. of 9 variables:
## $ Overall.rank : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Country.or.region : Factor w/ 156 levels "Afghanistan",..: 44 37 106 58 99 134 133 100 24 7 ...
## $ Score : num 7.77 7.6 7.55 7.49 7.49 ...
## $ GDP.per.capita : num 1.34 1.38 1.49 1.38 1.4 ...
## $ Social.support : num 1.59 1.57 1.58 1.62 1.52 ...
## $ Healthy.life.expectancy : num 0.986 0.996 1.028 1.026 0.999 ...
## $ Freedom.to.make.life.choices: num 0.596 0.592 0.603 0.591 0.557 0.572 0.574 0.585 0.584 0.532 ...
## $ Generosity : num 0.153 0.252 0.271 0.354 0.322 0.263 0.267 0.33 0.285 0.244 ...
## $ Perceptions.of.corruption : num 0.393 0.41 0.341 0.118 0.298 0.343 0.373 0.38 0.308 0.226 ...
str(df_2015)
## 'data.frame': 158 obs. of 2 variables:
## $ Country: Factor w/ 158 levels "Afghanistan",..: 136 59 38 106 25 46 100 135 101 7 ...
## $ Region : Factor w/ 10 levels "Australia and New Zealand",..: 10 10 10 10 6 10 10 10 1 1 ...
df_2019 <- rename(df_2019, Country = Country.or.region)
str(df_2019)
## 'data.frame': 156 obs. of 9 variables:
## $ Overall.rank : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Country : Factor w/ 156 levels "Afghanistan",..: 44 37 106 58 99 134 133 100 24 7 ...
## $ Score : num 7.77 7.6 7.55 7.49 7.49 ...
## $ GDP.per.capita : num 1.34 1.38 1.49 1.38 1.4 ...
## $ Social.support : num 1.59 1.57 1.58 1.62 1.52 ...
## $ Healthy.life.expectancy : num 0.986 0.996 1.028 1.026 0.999 ...
## $ Freedom.to.make.life.choices: num 0.596 0.592 0.603 0.591 0.557 0.572 0.574 0.585 0.584 0.532 ...
## $ Generosity : num 0.153 0.252 0.271 0.354 0.322 0.263 0.267 0.33 0.285 0.244 ...
## $ Perceptions.of.corruption : num 0.393 0.41 0.341 0.118 0.298 0.343 0.373 0.38 0.308 0.226 ...
str(df_2015)
## 'data.frame': 158 obs. of 2 variables:
## $ Country: Factor w/ 158 levels "Afghanistan",..: 136 59 38 106 25 46 100 135 101 7 ...
## $ Region : Factor w/ 10 levels "Australia and New Zealand",..: 10 10 10 10 6 10 10 10 1 1 ...
df_2019 <- left_join(df_2019, df_2015)
## Joining, by = "Country"
## Warning: Column `Country` joining factors with different levels, coercing to
## character vector
columns <- c("GDP.per.capita", "Social.support", "Healthy.life.expectancy",
"Freedom.to.make.life.choices", "Generosity", "Perceptions.of.corruption")
p1 <- ggplot(data = df_2019, mapping = aes(x = !!sym(columns[1]) , y = Score)) +
geom_point(aes(alpha = 0.5, color = Region), show.legend=FALSE) +
geom_smooth(method = "lm") + theme(legend.title = element_blank())
p2 <- ggplot(data = df_2019, mapping = aes(x = !!sym(columns[2]) , y = Score)) +
geom_point(aes(alpha = 0.5, color = Region), show.legend=FALSE) +
geom_smooth(method = "lm") + theme(legend.title = element_blank())
p3 <- ggplot(data = df_2019, mapping = aes(x = !!sym(columns[3]) , y = Score)) +
geom_point(aes(alpha = 0.5, color = Region), show.legend=FALSE) +
geom_smooth(method = "lm") + theme(legend.title = element_blank())
p4 <- ggplot(data = df_2019, mapping = aes(x = !!sym(columns[4]) , y = Score)) +
geom_point(aes(alpha = 0.5, color = Region), show.legend=FALSE) +
geom_smooth(method = "lm") + theme(legend.title = element_blank())
p5 <- ggplot(data = df_2019, mapping = aes(x = !!sym(columns[5]) , y = Score)) +
geom_point(aes(alpha = 0.5, color = Region), show.legend=FALSE) +
geom_smooth(method = "lm") + theme(legend.title = element_blank())
p6 <- ggplot(data = df_2019, mapping = aes(x = !!sym(columns[6]) , y = Score)) +
geom_point(alpha = 0.5, aes(color = Region), show.legend=FALSE) +
geom_smooth(method = "lm")
grid.arrange(p1, p2, p3, p4, p5, p6, nrow=3, heights=c(4,4,4))
mean <- ggplot(
df_2019 %>%
group_by(Region) %>%
summarise(mean_score = mean(Score),
mean_income = mean(GDP.per.capita)),
mapping = aes(x = mean_income, y = mean_score)) +
geom_point(aes(color=Region))
## Warning: Factor `Region` contains implicit NA, consider using
## `forcats::fct_explicit_na`
box <- ggplot(df_2019,
aes(x = Region, y = Score, colour= Region)) +
geom_point(alpha = 0.7) +
geom_boxplot(alpha = 0, colour = "black")
grid.arrange(mean, box, ncol=2)
cor <- cor(df_2019[,c(3,4,5,6,7,8,9)])
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
map_cor <- melt(cor)
ggplot(data = map_cor, aes(x=Var1, y=Var2, fill=value)) + geom_tile() +
scale_fill_gradient(low="white", high="red")
data(worldgeojson, package = "highcharter")
countries <- df_2019 %>%
filter(!is.na(Country)) %>%
select(Country, Score)
highchart() %>%
hc_add_series_map(worldgeojson, countries, value = "Score", joinBy = c('name','Country')) %>%
hc_colors(c("darkorange", "darkgray")) %>%
hc_colorAxis(stops = color_stops(), min=2.5, max=8) %>%
hc_title(text = "Happiness Score")
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
p <- df_2019 %>%
ggplot(aes(GDP.per.capita, Score, color=Region)) +
geom_point() +
theme_bw()
ggplotly(p)